//importing gdp 1998-2024
import excel "Data/nama_10_gdp__custom_15374211_spreadsheet.xlsx", sheet("Sheet 1") firstrow cellrange(A9:AB54) clear
rename TIME country
replace country = subinstr(country, "*", "", .) if country == "Kosovo*"
drop if country == "GEO (Labels)"
destring B - AB, replace force
foreach v of varlist B-AB {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "gdp" + "`x'"
   rename `v' `x_clean'
}
	*transform long format here
reshape long gdp, i(country) j(year)
drop if gdp==.
	*(84 observations deleted)
	*million euro unit to euro unit
replace gdp = gdp*1000000
tostring year, replace
save "Data/cleaned_data/CountryGDP.dta", replace
drop if country != "Euro area (EA11-1999, EA12-2001, EA13-2007, EA15-2008, EA16-2009, EA17-2011, EA18-2014, EA19-2015, EA20-2023)"
destring year, replace force
drop country
rename gdp EUgdp
save "Data/cleaned_data/EUGDP.dta", replace

//importing expenditure
import excel "Data/gov_10a_main__custom_15374330_spreadsheet.xlsx", sheet("Sheet 1") firstrow cellrange(A10:AA44) clear
rename TIME country
drop if country == "GEO (Labels)"
foreach v of varlist B-AA {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "expenditure" + "`x'"
   rename `v' `x_clean'
}
reshape long expenditure, i(country) j(year)
drop if expenditure==.
	*(0 observations deleted)
replace expenditure = expenditure*1000000
save "Data/cleaned_data/CountryExpenditure.dta", replace
	*uk expenditure
import excel "Data/imf-dm-export-20250310 (2).xls", sheet("exp") cellrange(A1:HQ3) firstrow clear
foreach v of varlist B-HQ {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "yr" + "`x'"
   rename `v' `x_clean'
}
drop yr1800-yr1997
rename Governmentexpenditurepercent country
reshape long yr, i(country) j(year)
rename yr expenditurePercentGDP
drop if expenditurePercentGDP==.
tostring year, replace
save "Data/cleaned_data/ukExpenditurePercentGDP.dta", replace
use "Data/cleaned_data/CountryGDP.dta", clear
keep if country == "United Kingdom"
merge 1:1 country year using "Data/cleaned_data/ukExpenditurePercentGDP.dta"
replace gdp = gdp * expenditurePercentGDP/100
rename gdp expenditure
drop expenditurePercentGDP _merge
drop if expenditure ==. 
save "Data/cleaned_data/ukExpenditure.dta", replace
use "Data/cleaned_data/CountryExpenditure.dta", clear
tostring year, replace
append using "Data/cleaned_data/ukExpenditure.dta"
encode country, gen(country_id)
save "Data/cleaned_data/CountryExpenditure.dta", replace

//importing revenue
import excel "Data/gov_10a_main__custom_15374330_spreadsheet.xlsx", sheet("Sheet 2") firstrow cellrange(A10:AA44) clear
rename TIME country
drop if country == "GEO (Labels)"
foreach v of varlist B-AA {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "revenue" + "`x'"
   rename `v' `x_clean'
}
reshape long revenue, i(country) j(year)
drop if revenue==.
	*(0 observations deleted)
replace revenue = revenue*1000000
save "Data/cleaned_data/CountryRevenue.dta", replace
	*uk revenue
import excel "Data/imf-dm-export-20250310 (1).xls", sheet("rev") cellrange(A1:HQ3) firstrow clear
foreach v of varlist B-HQ {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "yr" + "`x'"
   rename `v' `x_clean'
}
drop yr1800-yr1997
rename GovernmentrevenuepercentofG country
reshape long yr, i(country) j(year)
rename yr revenuePercentGDP
drop if revenuePercentGDP==.
tostring year, replace
save "Data/cleaned_data/ukRevenuePercentGDP.dta", replace
use "Data/cleaned_data/CountryGDP.dta", clear
keep if country == "United Kingdom"
merge 1:1 country year using "Data/cleaned_data/ukRevenuePercentGDP.dta"
replace gdp = gdp * revenuePercentGDP/100
rename gdp revenue
drop revenuePercentGDP _merge
drop if revenue ==. 
save "Data/cleaned_data/ukRevenue.dta", replace
use "Data/cleaned_data/CountryRevenue.dta", clear
tostring year, replace
append using "Data/cleaned_data/ukRevenue.dta"
encode country, gen(country_id)
save "Data/cleaned_data/CountryRevenue.dta", replace

//import gross debt (1/3)
import excel "Data/gov_10dd_edpt1__custom_15374849_spreadsheet.xlsx", sheet("Sheet 1") firstrow cellrange(A10:AA41) clear
rename TIME country
drop if country == "GEO (Labels)"
destring B - C, replace force
foreach v of varlist B-AA {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "debt" + "`x'"
   rename `v' `x_clean'
}
reshape long debt, i(country) j(year)
drop if debt==.
	*(4 observations deleted)
replace debt = debt*1000000
save "Data/cleaned_data/CountryGrossDebt.dta", replace
	*uk gross debt
import excel "Data/rftm18tables.xlsx", sheet("M1") firstrow cellrange(A5) clear
drop G-R
drop in 1/39
drop in 27/157
keep Timeperiod Generalgovernmentgrossconsoli
rename Timeperiod year
rename Generalgovernmentgrossconsoli debt
destring debt, replace force
replace debt = debt*1000000
gen country = "United Kingdom"
save "Data/cleaned_data/ukGrossDebt.dta", replace
use "Data/cleaned_data/CountryGrossDebt.dta", clear
tostring year, replace
append using "Data/cleaned_data/ukGrossDebt.dta"
encode country, gen(country_id)
save "Data/cleaned_data/CountryGrossDebt.dta", replace
collapse (sum) debt, by(year)
rename debt EUdebt
destring year, replace force
save "Data/cleaned_data/EUGrossDebt.dta", replace

//import gross debt securities (face value) (2/3)
import excel "Data/gov_10dd_edpt1__custom_15374849_spreadsheet.xlsx", sheet("Sheet 2") firstrow cellrange(A10:AA41) clear
rename TIME country
drop if country == "GEO (Labels)"
destring B - C, replace force
foreach v of varlist B-AA {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "debtfv" + "`x'"
   rename `v' `x_clean'
}
reshape long debtfv, i(country) j(year)
drop if debtfv==.
	*(6 observations deleted)
replace debtfv = debtfv*1000000
encode country, gen(country_id)
save "Data/cleaned_data/CountryGrossDebtSecurity.dta", replace

//import gross debt securities long term (face value) (3/3)
import excel "Data/gov_10dd_edpt1__custom_15374849_spreadsheet.xlsx", sheet("Sheet 3") firstrow cellrange(A10:AA41) clear
rename TIME country
drop if country == "GEO (Labels)"
destring B - C, replace force
foreach v of varlist B-AA {
   local x : variable label `v'
   if !_rc {
        local x = string(`x', "%12.0f")
    }
   local x_clean = "debtltfv" + "`x'"
   rename `v' `x_clean'
}
reshape long debtltfv, i(country) j(year)
drop if debtltfv==.
	*(6 observations deleted)
replace debtltfv = debtltfv*1000000
encode country, gen(country_id)
save "Data/cleaned_data/CountryGrossDebtSecurityLongTerm.dta", replace
	
//import sovereign yield
import delimited "Data/09-03-25 07_07_47_theglobaleconomy.csv", clear 
drop code continentcode
replace longterminterestrate = longterminterestrate / 100
drop if longterminterestrate==.
	*(34 observations deleted)
rename longterminterestrate sInterestRate
	*in generating an annual average, no winsorisation because all deviations and outliers fully reflect the state's stability and debt sustainability, unlike exchange rate winsorisation where extreme market exvents do not necessarily reflect the inherent competitiveness of the currencies. (valid arguement. I want this number to reflect the full pain of high interest rate)
bysort country year: gen total_obs = _N
bysort country year: gen sInterestRateAvgAnnual = sum(sInterestRate)
bysort country year: replace sInterestRateAvgAnnual = sInterestRateAvgAnnual[_N]
bysort country year: replace sInterestRateAvgAnnual = sInterestRateAvgAnnual/total_obs
drop month sInterestRate total_obs
duplicates drop
tostring year, replace
save "Data/cleaned_data/CountryBondYieldLongTerm.dta", replace
	*Estonia didn't issue any long term bonds until 2020, therefore interest rate starts in 2020
	//Cyprus
	*nov2015 to current
import delimited "Data/Cyprus 10-Year Bond Yield Historical Data.csv", clear 
drop open high low change
gen country = "Cyprus"
generate date2=date[_n+1]
list in 1/20
replace date2="11/01/2015" if date =="12/01/2015"
drop date
gen month = substr(date2, 1, strpos(date2, "/") - 1)
gen year = substr(date2, -4, 4)
drop date2
rename price rate
replace rate = rate/100
save "Data/cleaned_data/CyprusBondYieldLongTerm.dta", replace
	*jan2001 to oct2015
import excel "Data/CY Interest Rate Harmonised Long Term 10 years.xlsx", sheet("My Series") cellrange(A30:B319) clear
rename B rate
replace rate = rate /100
gen country = "Cyprus"
gen date = string(A, "%td")
replace date = substr(date, 3, .)
drop A
generate date2=date[_n-1]
drop in 1
drop date
gen month = substr(date2, 1, 3)
gen year = substr(date2, -4, 4)
drop date2
drop if _n > 178
append using "Data/cleaned_data/CyprusBondYieldLongTerm.dta"
sort year month
bysort year: gen total_obs = _N
bysort year: gen sInterestRateAvgAnnual = sum(rate)
bysort year: replace sInterestRateAvgAnnual = sInterestRateAvgAnnual[_N]
bysort year: replace sInterestRateAvgAnnual = sInterestRateAvgAnnual/total_obs
drop month rate total_obs
duplicates drop
save "Data/cleaned_data/CyprusBondYieldLongTerm.dta", replace
	//Malta
	*march2008 to current
import delimited "Data/Malta 10-Year Bond Yield Historical Data.csv", clear
drop open high low change
gen country = "Malta"
	*shifting month back by 1
generate date2=date[_n+1]
list in 1/20
replace date2="03/01/2008" if date =="04/01/2008"
drop date
gen month = substr(date2, 1, strpos(date2, "/") - 1)
gen year = substr(date2, -4, 4)
drop date2
rename price rate
replace rate = rate/100
save "Data/cleaned_data/MaltaBondYieldLongTerm.dta", replace
	*jan1999 to dec2013
import excel "Data/financial_market_int_rates.xls", sheet("Historic Rates- Jan 99 - Dec 13") clear
keep if inlist(_n, 5, 6, 57)
local i = 1
foreach var of varlist C-FZ {
    rename `var' time`i'
    local i = `i' + 1
}
drop A
rename B temp
replace temp = "year" in 1
replace temp = "month" in 2
reshape long time, i(temp) j(num)
replace time = time[_n-1] if missing(time) & temp == "year"
gen month_ = time if temp == "month"
gen month = month_[_n+180]
gen year_ = time if temp == "year"
gen year = year_[_n+360]
drop temp num month_ year_
drop if _n > 180
gen rate = real(time)
drop time
sort year
drop if _n<45
keep if _n <= 108 | (year == "2008" & (month == "February" | month == "January"))
replace rate = rate/100
append using "Data/cleaned_data/MaltaBondYieldLongTerm.dta"
replace country = "Malta" if country==""
sort year
bysort year: gen total_obs = _N
bysort year: gen sInterestRateAvgAnnual = sum(rate)
bysort year: replace sInterestRateAvgAnnual = sInterestRateAvgAnnual[_N]
bysort year: replace sInterestRateAvgAnnual = sInterestRateAvgAnnual/total_obs
drop month rate total_obs
duplicates drop
save "Data/cleaned_data/MaltaBondYieldLongTerm.dta", replace
	//merging all bond yields
use "Data/cleaned_data/CountryBondYieldLongTerm.dta", clear
append using "Data/cleaned_data/CyprusBondYieldLongTerm.dta"
append using "Data/cleaned_data/MaltaBondYieldLongTerm.dta"
destring(year), replace
save "Data/cleaned_data/CountryBondYieldLongTerm.dta", replace

//import EU bond yield
import delimited "Data/ECB Data Portal_20250212174517.csv", clear 
drop date
rename euroarea10yearsgovernmentbenchma euInterestRate
gen year = substr(timeperiod, 1, 4)
drop timeperiod
drop if year =="2025"
bysort year: gen total_obs = _N
bysort year: gen euInterestRateAvgAnnual = sum(euInterestRate)
bysort year: replace euInterestRateAvgAnnual = euInterestRateAvgAnnual[_N]
bysort year: replace euInterestRateAvgAnnual = euInterestRateAvgAnnual/total_obs
drop euInterestRate total_obs
replace euInterestRateAvgAnnual = euInterestRateAvgAnnual/100
duplicates drop
drop in 1/28
save "Data/cleaned_data/euBondYieldLongTerm.dta", replace

//=============Default Probability=============
//import sovereign bond ratings
import delimited "Data/09-03-25 07_05_57_theglobaleconomy.csv", clear 
drop outlook
drop if rating ==""
	*(1 observation deleted)
tab agency /*descrptive stats for 4 raters*/
duplicates drop country year month agency, force
	*(51 observations deleted) /*slight problem because it ommits rating published in the same month by same rater after the first time*/
replace agency = "Moodys" if agency == "Moody's"
replace agency = "SaP" if agency == "S&P"
reshape wide rating, i(country year month) j(agency) string
	*filling in missing months and years
tostring year month, replace
gen yrmo= year+"m"+month
gen date = monthly(yrmo, "YM")
format date %tm
encode country, gen(countryId)
tsset countryId date
tsfill
bysort countryId (date): replace country = country[_n-1] if missing(country)
bysort countryId (date): replace code = code[_n-1] if missing(code)
	*assuming rating is unchanged until next update
bysort countryId (date): replace ratingFitch = ratingFitch[_n-1] if missing(ratingFitch)
	*(7,226 real changes made)
bysort country (date): replace ratingMoodys = ratingMoodys[_n-1] if missing(ratingMoodys)
	*(7,630 real changes made)
bysort country (date): replace ratingSaP = ratingSaP[_n-1] if missing(ratingSaP)
	*(7,786 real changes made)
bysort country (date): replace ratingScope = ratingScope[_n-1] if missing(ratingScope)
	*(2,221 real changes made)
gen date_text = string(date, "%tm")
replace year = substr(date_text, 1, 4)
replace month = substr(date_text, 6, .)
drop date_text yrmo
	*clean out "NR" Not Rated, but keeping as empty cells (it's still important info)
foreach var in ratingFitch ratingMoodys ratingSaP ratingScope {
	replace `var' = "" if `var' == "NR"
}
save "Data/cleaned_data/CountryRatings.dta", replace

//import bond ratings and default rates (10yr treasury)
import excel "Data/Ratings & Default Probability.xlsx", sheet("Moodys") firstrow cellrange(A1:B13) clear
save "Data/cleaned_data/DefaultRatesMoodys.dta", replace
import excel "Data/Ratings & Default Probability.xlsx", sheet("S&P") firstrow cellrange(A1:B13) clear
save "Data/cleaned_data/DefaultRatesS&P.dta", replace
import excel "Data/Ratings & Default Probability.xlsx", sheet("Fitch") firstrow cellrange(A1:B23) clear
save "Data/cleaned_data/DefaultRatesFitch.dta", replace
import excel "Data/Ratings & Default Probability.xlsx", sheet("Scope") firstrow cellrange(A1:B11) clear
save "Data/cleaned_data/DefaultRatesScope.dta", replace

//merging bond ratings and default rates (Fitch 1/4)
use "Data/cleaned_data/CountryRatings.dta"
gen defaultRatesFitch = .
gen defaultRatesMoodys = .
gen defaultRatesSaP = .
gen defaultRatesScope = .
	*"SD" Selectively Defaulted (country chose to partially default its debt): I assume the default rate to be 1 (already defaulted) if "SD"
foreach var in defaultRatesSaP {
	replace `var' = 1 if ratingSaP == "SD"
}
	*(4 real changes made)
	*ratings need to be cleaned, ignoring difference within the tranche: e.g. "A3" or "A-" would be "A"
replace ratingFitch= strtrim(ratingFitch)
merge m:1 ratingFitch using "Data/cleaned_data/DefaultRatesFitch.dta"
drop if _merge == 2
replace defaultRatesFitch = Year10
drop Year10 _merge
sort country year month
save "Data/cleaned_data/CountryRatings.dta", replace

//merging bond ratings and default rates (Moody's 2/4)	
replace ratingMoodys= strtrim(ratingMoodys)
replace ratingMoodys = regexr(ratingMoodys, "([0-9])$", "")
	*(5,875 real changes made)
merge m:1 ratingMoodys using "Data/cleaned_data/DefaultRatesMoodys.dta"
drop if _merge == 2
replace defaultRatesMoodys = Year10
drop Year10 _merge
sort country year month
save "Data/cleaned_data/CountryRatings.dta", replace

//merging bond ratings and default rates (S&P 3/4)	
replace ratingSaP= strtrim(ratingSaP)
replace ratingSaP = regexr(ratingSaP, "([+-])$", "")
	*(3,922 real changes made)
merge m:1 ratingSaP using "Data/cleaned_data/DefaultRatesS&P.dta"
drop if _merge == 2
replace defaultRatesSaP = Year10 if defaultRatesSaP ==.
drop Year10 _merge
sort country year month
save "Data/cleaned_data/CountryRatings.dta", replace

//merging bond ratings and default rates (Scope 4/4)	
replace ratingScope= strtrim(ratingScope)
replace ratingScope = regexr(ratingScope, "([+-])$", "")
	*(1,321 real changes made)
merge m:1 ratingScope using "Data/cleaned_data/DefaultRatesScope.dta"
drop if _merge == 2
replace defaultRatesScope = Year10 
drop Year10 _merge
sort country year month
save "Data/cleaned_data/CountryRatings.dta", replace
	*avg of the 4 raters
gen defaultRateAvg = cond(!missing(defaultRatesFitch), defaultRatesFitch, 0) + cond(!missing(defaultRatesMoodys), defaultRatesMoodys, 0) + cond(!missing(defaultRatesSaP), defaultRatesSaP, 0) + cond(!missing(defaultRatesScope), defaultRatesScope, 0)
gen non_missing_count = (defaultRatesFitch < .) + (defaultRatesMoodys < .) + (defaultRatesSaP < .) + (defaultRatesScope < .)
replace defaultRateAvg = defaultRateAvg / non_missing_count if non_missing_count!=0
replace defaultRateAvg = . if non_missing_count==0
drop non_missing_count
save "Data/cleaned_data/CountryRatings.dta", replace

//making an annual default rate
drop ratingFitch ratingMoodys ratingSaP ratingScope defaultRatesFitch defaultRatesMoodys defaultRatesSaP defaultRatesScope
	*I have assumed that non-reported months have the same rating as last reporting instance, unless it's specified as a month with missing data
bysort country year: gen non_missing_months = sum(!missing(defaultRateAvg))
bysort country year: replace non_missing_months = non_missing_months[_N]
bysort country year: gen defaultRateAvgAnnual = sum(cond(!missing(defaultRateAvg), defaultRateAvg, 0))
bysort country year: replace defaultRateAvgAnnual = defaultRateAvgAnnual[_N]
replace defaultRateAvgAnnual = defaultRateAvgAnnual / non_missing_months
replace defaultRateAvgAnnual = . if defaultRateAvg ==.
drop non_missing_months
save "Data/cleaned_data/CountryRatingsFull.dta", replace
drop month code date defaultRateAvg
duplicates drop country year defaultRateAvgAnnual, force
drop if defaultRateAvg==.
save "Data/cleaned_data/CountryRatings.dta", replace
//=====================================

//rho: financial assistance 
import excel "Data/Financial Assistance ESM.xlsx", sheet("AssistanceAmount") firstrow clear
	*IMF is not EU assistance
drop IMFbn Source
rename Country country
foreach var of varlist _all {
    capture confirm string variable `var'
    if _rc == 0 continue 
    replace `var' = 0 if missing(`var')
}
gen assistanceAmount = 1000000000*(EFSFbn + ESMbn + ECbn + BilateralLoansbn)
drop EFSFbn ESMbn ECbn BilateralLoansbn
save "Data/cleaned_data/AssistanceAmount.dta", replace

//covid PEPP assistance package amount
*negative numbers
import delimited "Data/PEPP_public_sector_securities_breakdown_history.csv", varnames(3) clear 
keep in 1/21
rename v1 country
foreach var of varlist mar20-feb25 {
    local newname = "amount" + "`var'"
    rename `var' `newname'
}
reshape long amount, i(country) j(monthYear) string
destring amount, replace
replace amount = amount *1000000
rename cumulative totalCurrentHoldings
replace totalCurrentHoldings = totalCurrentHoldings *1000000
gen month = substr(monthYear, 1, 3)
gen year  = substr(monthYear, 4, .)
drop monthYear
replace year = "20" + year
	*negative amounts are non-cash amortisation adjustments, shouldn't be counted. 
replace amount = 0 if amount<0
bysort country year (amount): gen covidAssistanceAmount = sum(amount)
by country year: replace covidAssistanceAmount = covidAssistanceAmount[_N]
drop amount month
duplicates drop
drop if country=="Total"
save "Data/cleaned_data/PEPPAmounttemp.dta", replace

//Other Datasets (not in master.dta)
//EURO/USD exchange rate
import excel "Data/DEXUSEU.xlsx", sheet("Daily") firstrow clear
gen str_date = string(observation_date, "%td")
gen day = substr(str_date, 1, 2) 
gen month = substr(str_date, 3, 3) 
gen year = substr(str_date, 6, 4) 
rename DEXUSEU euro_usdEX
drop if euro_usdEX==.
	*(264 observations deleted)
	*the data is how many USD is worth for 1 euro, now it's how many euro is 1 USD worth
replace euro_usdEX=1/euro_usdEX
egen group = group(year)
gen winsorised = .
su group, meanonly
	*the h(1) element means trim 0.5% of sample at each end, right?
forval i  = 1/`r(max)' {
	capture { 
		winsor euro_usdEX if group == `i', gen(work) h(1)
		replace winsorised = work if group == `i'
		drop work
   }
}
bysort year: gen total_days = _N
bysort year: gen euro_usdEXAvgAnnual = sum(euro_usdEX)
bysort year: replace euro_usdEXAvgAnnual = euro_usdEXAvgAnnual[_N]
bysort year: replace euro_usdEXAvgAnnual = euro_usdEXAvgAnnual/total_days
drop group observation euro_usdEX str_date day month winsorised total_days
duplicates drop
destring year, replace
save "Data/cleaned_data/Euro_USDExchangeRate.dta", replace

//Financial Stability Index (CISS)
import excel "Data/ECB Data Portal long_20250429180843.xlsx", sheet("DATA(CISS)") cellrange(B1:C9592) firstrow clear
	*from daily to annual
gen year = substr(TIMEPERIOD, -4, 4)
drop TIMEPERIOD
replace OBSVALUE = OBSVALUE[_n-1] if missing(OBSVALUE)
bysort year: gen total_days = _N
bysort year: gen CISSIndex = sum(OBSVALUE)
bysort year: replace CISSIndex = CISSIndex[_N]
bysort year: replace CISSIndex = CISSIndex/total_days
drop OBSVALUE total_days
duplicates drop
destring year, replace
save "Data/cleaned_data/CISS.dta", replace